#!/usr/bin/env python

""" MultiQC: A modular tool to aggregate results from bioinformatics analyses across many samples into a single report
"""

from __future__ import print_function

import base64
import click
from distutils import version
from distutils.dir_util import copy_tree
import io
import jinja2
import os
import pkg_resources
import re
import shutil
import subprocess
import sys
import tempfile
import traceback

try:
    from urllib.request import urlopen #py3
except ImportError:
    from urllib import urlopen #py2
    # Use UTF-8 encoding by default
    reload(sys)
    sys.setdefaultencoding('utf8')

from multiqc import __version__
from multiqc.plots import table
from multiqc.utils import report, plugin_hooks, config, log
logger = config.logger

@click.command(
    context_settings = dict( help_option_names = ['-h', '--help'] )
)
@click.argument('analysis_dir',
                    type = click.Path(exists=True),
                    nargs = -1,
                    required = True,
                    metavar = "<analysis directory>"
)
@click.option('-f', '--force',
                    is_flag = True,
                    help = "Overwrite any existing reports"
)
@click.option('-d', '--dirs',
                    is_flag = True,
                    help = "Prepend directory to sample names"
)
@click.option('-dd', '--dirs-depth', 'dirs_depth',
                    type = int,
                    help = "Prepend [INT] directories to sample names. Negative number to take from start of path."
)
@click.option('-s', '--fullnames', 'no_clean_sname',
                    is_flag = True,
                    help = "Do not clean the sample names (leave as full file name)"
)
@click.option('-i', '--title',
                    type = str,
                    help = "Report title. Printed as page header, used for filename if not otherwise specified."
)
@click.option('-b', '--comment', 'report_comment',
                    type = str,
                    help = "Custom comment, will be printed at the top of the report."
)
@click.option('-n', '--filename',
                    type = str,
                    help = "Report filename. Use 'stdout' to print to standard out."
)
@click.option('-o', '--outdir',
                    type = str,
                    help = "Create report in the specified output directory."
)
@click.option('-t', '--template',
                    type = click.Choice(config.avail_templates),
                    help = "Report template to use."
)
@click.option('-x', '--ignore',
                    type = str,
                    multiple = True,
                    help = "Ignore analysis files (glob expression)"
)
@click.option('-l', '--file-list',
                    is_flag = True,
                    help = "Supply a file containing a list of file paths to be searched, one per row"
)
@click.option('-e', '--exclude', metavar='[module name]',
                    type = click.Choice(['general_stats']+list(config.avail_modules.keys())),
                    multiple = True,
                    help = "Do not use this module. Can specify multiple times."
)
@click.option('-m', '--module', metavar='[module name]',
                    type = click.Choice(config.avail_modules),
                    multiple = True,
                    help = "Use only this module. Can specify multiple times."
)
@click.option('--data-dir', 'make_data_dir',
                    is_flag = True,
                    help = "Force the parsed data directory to be created."
)
@click.option('--no-data-dir', 'no_data_dir',
                    is_flag = True,
                    help = "Prevent the parsed data directory from being created."
)
@click.option('-k', '--data-format', 'data_format',
                    type = click.Choice(config.data_format_extensions.keys()),
                    help = "Output parsed data in a different format. Default: {}".format(config.data_format)
)
@click.option('-z', '--zip-data-dir', 'zip_data_dir',
                    is_flag = True,
                    help = "Compress the data directory."
)
@click.option('-p', '--export', 'export_plots',
                    is_flag = True,
                    help = "Export plots as static images in addition to the report"
)
@click.option('-fp', '--flat', 'plots_flat',
                    is_flag = True,
                    help = "Use only flat plots (static images)"
)
@click.option('-ip', '--interactive', 'plots_interactive',
                    is_flag = True,
                    help = "Use only interactive plots (HighCharts Javascript)"
)
@click.option('--pdf', 'make_pdf',
                    is_flag = True,
                    help = "Creates PDF report with 'simple' template. Requires Pandoc to be installed."
)
@click.option('-c', '--config', 'config_file',
                    type = click.Path(exists=True, readable=True),
                    help = "Specific config file to load, after those in MultiQC dir / home dir / working dir."
)
@click.option('-v', '--verbose',
                    count = True,
                    default = 0,
                    help = "Increase output verbosity."
)
@click.option('-q', '--quiet',
                    is_flag = True,
                    help = "Only show log warnings"
)
@click.version_option(__version__)

def multiqc(analysis_dir, dirs, dirs_depth, no_clean_sname, title, report_comment, template, module, exclude, outdir,
ignore, file_list, filename, make_data_dir, no_data_dir, data_format, zip_data_dir, force, export_plots,
plots_flat, plots_interactive, make_pdf, config_file, verbose, quiet, **kwargs):
    """MultiQC aggregates results from bioinformatics analyses across many samples into a single report.

        It searches a given directory for analysis logs and compiles a HTML report.
        It's a general use tool, perfect for summarising the output from numerous
        bioinformatics tools.

        To run, supply with one or more directory to scan for analysis results.
        To run here, use 'multiqc .'

        See http://multiqc.info for more details.

        Author: Phil Ewels (http://phil.ewels.co.uk)
    """

    # Set up logging level
    loglevel = log.LEVELS.get(min(verbose,1), "INFO")
    if quiet:
        loglevel = 'WARNING'
    log.init_log(logger, loglevel=loglevel)

    # Load config files
    config.mqc_load_userconfig(config_file)
    plugin_hooks.mqc_trigger('config_loaded')

    # Log the command used to launch MultiQC
    report.multiqc_command = " ".join(sys.argv)
    logger.debug("Command used: {}".format(report.multiqc_command))

    # Check that we're running the latest version of MultiQC
    if config.no_version_check is not True:
        try:
            response = urlopen('http://multiqc.info/version.php?v={}'.format(__version__), timeout=5)
            remote_version = response.read().decode('utf-8')
            if version.StrictVersion(re.sub('[^0-9\.]','', remote_version)) > version.StrictVersion(re.sub('[^0-9\.]','', __version__)):
                logger.warn('MultiQC Version {} now available!'.format(remote_version))
        except:
            logger.debug('Could not connect to multiqc.info for version check')

    # Set up key variables (overwrite config vars from command line)
    if template is not None:
        config.template = template
    if title is not None:
        config.title = title
    if report_comment is not None:
        config.report_comment = report_comment
    config.prepend_dirs = dirs
    if dirs_depth is not None:
        config.prepend_dirs_depth = dirs_depth
    config.analysis_dir = analysis_dir
    if outdir is not None:
        config.output_dir = outdir
    if no_clean_sname:
        config.fn_clean_sample_names = False
        logger.info("Not cleaning sample names")
    if make_data_dir:
        config.make_data_dir = True
    if no_data_dir:
        config.make_data_dir = False
    config.force = force
    config.zip_data_dir = zip_data_dir
    if data_format is not None:
        config.data_format = data_format
    if export_plots:
        config.export_plots = True
    if plots_flat:
        config.plots_force_flat = True
    if plots_interactive:
        config.plots_force_interactive = True
    if make_pdf:
        config.template = 'simple'
    config.kwargs = kwargs # Plugin command line options

    plugin_hooks.mqc_trigger('execution_start')

    logger.info("This is MultiQC v{}".format(__version__))
    logger.debug("Command     : {}".format(' '.join(sys.argv)))
    logger.debug("Working dir : {}".format(os.getcwd()))
    if make_pdf:
        logger.info('--pdf specified. Using non-interactive HTML template.')
    logger.info("Template    : {}".format(config.template))

    # Add files if --file-list option is given
    if file_list:
        if len(analysis_dir) > 1:
            raise ValueError("If --file-list is giving, analysis_dir should have only one plain text file.")
        config.analysis_dir = ()
        with (open(analysis_dir[0])) as in_handle:
            for line in in_handle:
                if os.path.exists(line.strip()):
                    config.analysis_dir += (os.path.abspath(line.strip()),)
        if len(config.analysis_dir) == 0:
            logger.error("No files were added from {} using --file-list option.".format(analysis_dir[0]))
            logger.error("Please, check that {} contains correct file paths.".format(analysis_dir[0]))
            raise ValueError("Any files to be searched.")

    if len(ignore) > 0:
        logger.debug("Ignoring files, directories and paths that match: {}".format(", ".join(ignore)))
        config.fn_ignore_files.extend(ignore)
        config.fn_ignore_dirs.extend(ignore)
        config.fn_ignore_paths.extend(ignore)
    if filename == 'stdout':
        config.output_fn = sys.stdout
        logger.info("Printing report to stdout")
    else:
        if title is not None and filename == config.output_fn_name:
            filename = re.sub('[^\w\.-]', '', re.sub('[-\s]+', '-', title) ).strip()
            filename += '_multiqc_report'
        if filename is not None:
            if filename.endswith('.html'):
                filename = filename[:-5]
            config.output_fn_name = filename
            config.data_dir_name = '{}_data'.format(filename)
        if not config.output_fn_name.endswith('.html'):
            config.output_fn_name = '{}.html'.format(config.output_fn_name)

        # Check that we're not going to overwrite anything before we run
        config.output_fn = os.path.join(config.output_dir, config.output_fn_name)
        config.data_dir = os.path.join(config.output_dir, config.data_dir_name)
        if not config.force:
            if os.path.exists(config.output_fn):
                logger.error("MultiQC Report '{}' already exists.".format(os.path.relpath(config.output_fn)))
                logger.info("Use -f or --force to overwrite existing reports")
                sys.exit(1)
            if config.make_data_dir == True and os.path.exists(config.data_dir):
                logger.error("Output directory '{}' already exists.".format(os.path.relpath(config.data_dir)))
                logger.info("Use -f or --force to overwrite existing reports")
                sys.exit(1)
            if config.export_plots == True and os.path.exists(config.plots_dir_name):
                logger.error("Plots directory '{}' already exists.".format(os.path.relpath(config.plots_dir_name)))
                logger.info("Use -f or --force to overwrite existing reports")
                sys.exit(1)
        else:
            logger.debug('Running in --force mode, will overwrite any existing reports.')


    # Print some status updates
    if config.title is not None:
        logger.info("Report title: {}".format(config.title))
    if dirs:
        logger.info("Prepending directory to sample names")
    for d in config.analysis_dir:
        logger.info("Searching '{}'".format(d))

    # Get the list of modules we want to run, in the order that we want them
    run_modules = [ m for m in config.top_modules if m in config.avail_modules.keys() ]
    run_modules.extend( [ m for m in config.avail_modules.keys() if m not in config.module_order and m not in run_modules ] )
    run_modules.extend( [ m for m in config.module_order if m in config.avail_modules.keys() and m not in run_modules ] )

    if module:
        run_modules = [m for m in run_modules if m in module]
        logger.info('Only using modules {}'.format(', '.join(module)))
    elif exclude:
        logger.info("Excluding modules '{}'".format("', '".join(exclude)))
        if 'general_stats' in exclude:
            config.skip_generalstats = True
            exclude = tuple(x for x in exclude if x != 'general_stats')
        run_modules = [m for m in run_modules if m not in exclude]
    if len(run_modules) == 0:
        logger.critical('No analysis modules specified!')
        sys.exit(1)
    logger.debug("Analysing modules: {}".format(', '.join(run_modules)))

    # Create the temporary working directories
    tmp_dir = tempfile.mkdtemp()
    logger.debug('Using temporary directory for creating report: {}'.format(tmp_dir))
    config.data_tmp_dir = os.path.join(tmp_dir, 'multiqc_data')
    if filename != 'stdout' and config.make_data_dir == True:
        config.data_dir = config.data_tmp_dir
        os.makedirs(config.data_dir)
    else:
        config.data_dir = None
    config.plots_tmp_dir = os.path.join(tmp_dir, 'multiqc_plots')
    if filename != 'stdout' and config.export_plots == True:
        config.plots_dir = config.plots_tmp_dir
        os.makedirs(config.plots_dir)

    # Load the template
    template_mod = config.avail_templates[config.template].load()

    # Add an output subdirectory if specified by template
    try:
        config.output_dir = os.path.join(config.output_dir, template_mod.output_subdir)
    except AttributeError:
        pass # No subdirectory variable given


    # Get the list of files to search
    report.get_filelist()

    # Run the modules!
    plugin_hooks.mqc_trigger('before_modules')
    report.modules_output = list()
    sys_exit_code = 0
    for this_module in run_modules:
        try:
            mod = config.avail_modules[this_module].load()
            output = mod()
            if type(output) != list:
                output = [output]
            for m in output:
                report.modules_output.append(m)

            # Copy over css & js files if requested by the theme
            try:
                for to, path in report.modules_output[-1].css.items():
                    copy_to = os.path.join(tmp_dir, to)
                    os.makedirs(os.path.dirname(copy_to))
                    shutil.copyfile(path, copy_to)
            except AttributeError:
                pass
            try:
                for to, path in report.modules_output[-1].js.items():
                    copy_to = os.path.join(tmp_dir, to)
                    os.makedirs(os.path.dirname(copy_to))
                    shutil.copyfile(path, copy_to)
            except AttributeError:
                pass

        except UserWarning:
            pass # No samples found
        except KeyboardInterrupt:
            shutil.rmtree(tmp_dir)
            logger.critical(
                    "User Cancelled Execution!\n{eq}\n{tb}{eq}\n"
                    .format(eq=('='*60), tb=traceback.format_exc())+
                    "User Cancelled Execution!\nExiting MultiQC...")
            sys.exit(1)
        except:
            # Flag the error, but carry on
            logger.error("Oops! The '{}' MultiQC module broke... \n".format(this_module) + \
                      (' '*20)+"Please copy the following traceback and report it at " + \
                      "https://github.com/ewels/MultiQC/issues \n" + \
                      (' '*20)+"(if possible, include a log file that triggers the error) \n" + \
                      ('='*60)+"\nModule {} raised an exception: {}".format(
                          this_module, traceback.format_exc()) + ('='*60))
            sys_exit_code = 1

    # Did we find anything?
    if len(report.modules_output) == 0:
        logger.warn("No analysis results found. Cleaning up..")
        shutil.rmtree(tmp_dir)
        logger.info("MultiQC complete")
        # Exit with an error code if a module broke
        sys.exit(sys_exit_code)

    plugin_hooks.mqc_trigger('after_modules')

    # Generate the General Statistics HTML & write to file
    if len(report.general_stats_data) > 0:
        pconfig = {
            'id': 'general_stats_table',
            'table_title': 'General Statistics',
            'save_file': True,
            'raw_data_fn':'multiqc_general_stats'
        }
        report.general_stats_html = table.plot(report.general_stats_data, report.general_stats_headers, pconfig)
    else:
        config.skip_generalstats = True

    # Write the report sources to disk
    if config.data_dir is not None:
        report.data_sources_tofile()

    plugin_hooks.mqc_trigger('before_report_generation')

    # Make the final report path & data directories
    if filename != 'stdout':
        # Check for existing reports and remove if -f was specified
        # We repeat this check in case the output name has been altered since launch
        config.output_fn = os.path.join(config.output_dir, config.output_fn_name)
        if os.path.exists(config.output_fn):
            if config.force:
                logger.warning("Deleting    : {}   (-f was specified)".format(os.path.relpath(config.output_fn)))
                os.remove(config.output_fn)
            else:
                logger.error("MultiQC Report {} already exists.".format(config.output_fn))
                logger.info("Use -f or --force to overwrite existing reports")
                shutil.rmtree(tmp_dir)
                sys.exit(1)
        # Make directories for report if not already existing
        if not os.path.exists(os.path.dirname(config.output_fn)):
            os.makedirs(os.path.dirname(config.output_fn))
        logger.info("Report      : {}".format(os.path.relpath(config.output_fn)))

        # Now do the same for the data directory
        if config.make_data_dir == False:
            logger.info("Data        : None")
        else:
            config.data_dir = os.path.join(config.output_dir, config.data_dir_name)
            if os.path.exists(config.data_dir):
                if config.force:
                    logger.warning("Deleting    : {}   (-f was specified)".format(os.path.relpath(config.data_dir)))
                    shutil.rmtree(config.data_dir)
                else:
                    logger.error("Output directory {} already exists.".format(config.data_dir))
                    logger.info("Use -f or --force to overwrite existing reports")
                    shutil.rmtree(tmp_dir)
                    sys.exit(1)
            os.makedirs(config.data_dir)
            logger.info("Data        : {}".format(os.path.relpath(config.data_dir)))

            # Modules have run, so data directory should be complete by now. Move its contents.
            for f in os.listdir(config.data_tmp_dir):
                fn = os.path.join(config.data_tmp_dir, f)
                logger.debug("Moving data file from '{}' to '{}'".format(fn, config.data_dir))
                shutil.move(fn, config.data_dir)

        # Finally, copy across the plots
        if config.export_plots:
            config.plots_dir = os.path.join(config.output_dir, config.plots_dir_name)
            if os.path.exists(config.plots_dir):
                if config.force:
                    logger.warning("Deleting    : {}   (-f was specified)".format(os.path.relpath(config.plots_dir)))
                    shutil.rmtree(config.plots_dir)
                else:
                    logger.error("Output directory {} already exists.".format(config.plots_dir))
                    logger.info("Use -f or --force to overwrite existing reports")
                    shutil.rmtree(tmp_dir)
                    sys.exit(1)
            os.makedirs(config.plots_dir)
            logger.info("Plots       : {}".format(os.path.relpath(config.plots_dir)))

            # Modules have run, so plots directory should be complete by now. Move its contents.
            for f in os.listdir(config.plots_tmp_dir):
                fn = os.path.join(config.plots_tmp_dir, f)
                logger.debug("Moving plots directory from '{}' to '{}'".format(fn, config.plots_dir))
                shutil.move(fn, config.plots_dir)

    plugin_hooks.mqc_trigger('before_template')

    # Load in parent template files first if a child theme
    try:
        parent_template = config.avail_templates[template_mod.template_parent].load()
        copy_tree(parent_template.template_dir, tmp_dir)
    except AttributeError:
        pass # Not a child theme

    # Copy the template files to the tmp directory (distutils overwrites parent theme files)
    copy_tree(template_mod.template_dir, tmp_dir)

    # Function to include file contents in Jinja template
    def include_file(name, fdir=tmp_dir, b64=False):
        if b64:
            with io.open (os.path.join(fdir, name), "rb") as f:
                return base64.b64encode(f.read()).decode('utf-8')
        else:
            with io.open (os.path.join(fdir, name), "r", encoding='utf-8') as f:
                return f.read()

    # Load the report template
    try:
        env = jinja2.Environment(loader=jinja2.FileSystemLoader(tmp_dir))
        env.globals['include_file'] = include_file
        j_template = env.get_template(template_mod.base_fn)
    except:
        raise IOError ("Could not load {} template file '{}'".format(config.template, template_mod.base_fn))

    # Use jinja2 to render the template and overwrite
    config.analysis_dir = [os.path.realpath(d) for d in config.analysis_dir]
    report_output = j_template.render(report=report, config=config)
    if filename == 'stdout':
        print(report_output.encode('utf-8'), file = sys.stdout)
    else:
        try:
            with io.open (config.output_fn, "w", encoding='utf-8') as f:
                print(report_output, file=f)
        except IOError as e:
            raise IOError ("Could not print report to '{}' - {}".format(config.output_fn, IOError(e)))

        # Copy over files if requested by the theme
        try:
            for f in template_mod.copy_files:
                fn = os.path.join(tmp_dir, f)
                dest_dir = os.path.join( os.path.dirname(config.output_fn), f)
                copy_tree(fn, dest_dir)
        except AttributeError:
            pass # No files to copy

    # Clean up temporary directory
    shutil.rmtree(tmp_dir)

    # Zip the data directory if requested
    if config.zip_data_dir and config.data_dir is not None:
        shutil.make_archive(config.data_dir, 'zip', config.data_dir)
        shutil.rmtree(config.data_dir)

    # Try to create a PDF if requestted
    if make_pdf:
        try:
            pdf_fn_name = config.output_fn_name.replace('.html', '.pdf')
            pandoc_call = [
                'pandoc',
                '--standalone',
                config.output_fn,
                '--output', pdf_fn_name,
                '--latex-engine=xelatex',
                '-V', 'documentclass=article',
                '-V', 'geometry=margin=1in',
                '-V', 'title='
            ]
            if config.pandoc_template is not None:
                pandoc_call.append('--template={}'.format(config.pandoc_template))
            logger.debug("Attempting Pandoc conversion to PDF with following command:\n{}".format(' '.join(pandoc_call)))
            pdf_exit_code = subprocess.call(pandoc_call)
            if pdf_exit_code != 0:
                logger.error("Error creating PDF! Pandoc returned a non-zero exit code.")
            else:
                logger.info("PDF Report  : {}".format(pdf_fn_name))
        except OSError as e:
            if e.errno == os.errno.ENOENT:
                logger.error('Error creating PDF - pandoc not found. Is it installed? http://pandoc.org/')
            else:
                logger.error("Error creating PDF! Something went wrong when creating the PDF\n"+
                    ('='*60)+"\n{}\n".format(traceback.format_exc()) + ('='*60))

    plugin_hooks.mqc_trigger('execution_finish')

    logger.info("MultiQC complete")

    # Move the log file into the data directory
    log.copy_tmp_log(logger)

    # Exit with an error code if a module broke
    sys.exit(sys_exit_code)


if __name__ == "__main__":
    # Add any extra plugin command line options
    for entry_point in pkg_resources.iter_entry_points('multiqc.cli_options.v1'):
        opt_func = entry_point.load()
        multiqc = opt_func(multiqc)
    multiqc()
